Convert the SQLite DB

The OSM data is fundamentally quite relational, and so should fit well with a relational DB (though we can expect disk usage to be high). Python has SQLite built-in, so it's a natural choice. (Of course, enterprise levels DBs and GIS addons would make more sense for very heavily lifting).

Here I use the Isle of Man data, from http://download.geofabrik.de/europe.html, to experiment on.


In [1]:
import sys
sys.path.insert(0, "..")

In [2]:
import sqlite3
import datetime
import os

In [3]:
import osmdigest.sqlite as sq

In [4]:
# Delete the database if it already exists.
try:
    os.remove("demo.db")
except FileNotFoundError:
    pass

In [5]:
import os
filename = os.path.join("//media", "disk", "OSM_Data", "isle-of-man-latest.osm.bz2")
#filename = os.path.join("..", "..", "..", "Data", "isle-of-man-latest.osm.bz2")

In [6]:
start = datetime.datetime.now()
sq.convert(filename, "demo.db")

print(datetime.datetime.now() - start)


0:00:11.004262

In [7]:
db = sq.OSM_SQLite("demo.db")

In [8]:
for i, node in zip(range(10), db.nodes()):
    print(node)


Node(13802141 @ [53.5104366,-3.1942821] {})
Node(13802146 @ [53.448266,-3.0242517] {})
Node(13802147 @ [53.4165984,-3.0086659] {})
Node(13802148 @ [53.4056923,-2.9994948] {})
Node(23010534 @ [53.7770793,-3.0541303] {'highway': 'traffic_signals'})
Node(283477866 @ [54.0967845,-4.7776061] {})
Node(283477867 @ [54.2114586,-4.6541152] {})
Node(283477868 @ [54.1196968,-4.6130283] {})
Node(283477869 @ [54.222377,-4.5905299] {})
Node(283477871 @ [54.2061508,-4.5991698] {})

In [9]:
for i, node in zip(range(10), db.ways()):
    print(node)


Way(22772385 ->  [292039814, 4375877404, 4375877408, 292039921, 292039778, 13802141, 4448539080, 13802146, 472475209, 472475210, 472475212, 472475213, 472475211, 472475214, 13802147, 453404917, 453404920, 453404923, 453404925, 13802148, 621886775] {'duration': '2:45', 'name': 'Isle of Man - Liverpool', 'operator': 'Isle of Man Steam Packet Company', 'route': 'ferry'})
Way(25985909 ->  [283508770, 283480687, 1747483705, 1747483685, 1747483620, 283497137, 1747483673, 1747483715, 283515694, 283515142, 1747483793, 1747483592, 1747483782, 283485720, 1747483726, 1747483618, 283505825, 1747483814, 1747483738, 1747483727, 1747483721, 1747483706, 1747483657, 1747483713, 1747483794, 1747483788, 1747483694, 1747483684, 1747483729, 283519722, 283493902, 1747511279, 283510772, 283483149, 283499649, 283480978] {'source': 'OS Second War Revision 1940. Pub 1942', 'waterway': 'stream'})
Way(25985910 ->  [283509867, 283505044] {'name': 'Laurel Avenue', 'highway': 'residential', 'surface': 'paved', 'abutters': 'residential', 'maxspeed': '20 mph'})
Way(25985912 ->  [283515609, 962910141, 283511741, 283506297, 283497195, 283491629, 283487231] {'source': 'Isle of Man Government aerial imagery (2001)', 'highway': 'service', 'maxspeed': '30 mph'})
Way(25985913 ->  [283499174, 781577540, 781577544, 283505382] {'foot': 'yes', 'name': 'Marine Parade', 'lanes': '2', 'bicycle': 'yes', 'highway': 'unclassified', 'surface': 'asphalt', 'abutters': 'retail', 'maxspeed': '30 mph', 'sidewalk': 'both'})
Way(25985915 ->  [283486330, 746330946, 826954675, 826954461, 746330947, 2244295314] {'highway': 'footway', 'surface': 'paved'})
Way(25985916 ->  [283486004, 283496434, 283502912, 283508345] {'name': 'Derby Square Lane', 'highway': 'service', 'maxspeed': '30 mph'})
Way(25985917 ->  [283490074, 810751896, 283505739, 810750888, 810750343, 810751157, 810751357, 810752110, 810751632, 810750622, 283512512, 283519121, 810750825, 283500697, 283494646, 283514666, 283511853, 283507238, 283523552, 810751200, 810751473, 810751673, 810750663, 810751940, 810750931, 1219059151, 1219059207, 810751137, 810750729, 283517659, 810751875, 810751399, 810750306, 810750867, 810751338, 810750601, 810751609, 810751809, 810750801, 283507694, 283484160, 810752088, 1186420773] {'boundary': 'administrative', 'admin_level': '6'})
Way(25985918 ->  [1185304677, 1185304350, 283497835, 283480397] {'highway': 'service'})
Way(25985919 ->  [283511310, 2469484449, 2469484450, 2469484451, 283507264, 283522231, 283517667, 283489576, 1721218706, 283482420, 1721218705, 283499613, 1721218704, 1721218703, 283494485] {'ref': 'A22', 'name': 'Ballanard Road', 'highway': 'primary', 'surface': 'paved', 'maxspeed': 'none'})

Similarly for relations. It is worth noting something a little surprising about the input data here. The first relation returned is "Scotland". This is because, in OSM data, the boundary for Scotland forms part of the boundary of the Isle of Man. If you investigate further, you'll find that some of the nodes and ways in this relation are not included in the XML file, because e.g. the label node for Scotland is not part of the Isle of Man.


In [10]:
for i, node in zip(range(2), db.relations()):
    print(node)


Relation(58446 ->  [Member(type='node', ref=470649732, role='label'), Member(type='node', ref=17898859, role='admin_centre'), Member(type='way', ref=28421934, role='outer'), Member(type='way', ref=28421899, role='outer'), Member(type='way', ref=28421901, role='outer'), Member(type='way', ref=144804249, role='outer'), Member(type='way', ref=160844050, role='outer'), Member(type='way', ref=229560139, role='outer'), Member(type='way', ref=141730497, role='outer'), Member(type='way', ref=142982510, role='outer'), Member(type='way', ref=191586935, role='outer'), Member(type='way', ref=191586938, role='outer'), Member(type='way', ref=147077889, role='outer'), Member(type='way', ref=179086121, role='outer'), Member(type='way', ref=147078911, role='outer'), Member(type='way', ref=141730506, role='outer'), Member(type='way', ref=140550301, role='outer'), Member(type='way', ref=369520845, role='outer'), Member(type='way', ref=139314855, role='outer'), Member(type='way', ref=139487830, role='outer'), Member(type='way', ref=139312409, role='outer'), Member(type='way', ref=139315081, role='outer'), Member(type='way', ref=139489045, role='outer'), Member(type='way', ref=139312404, role='outer'), Member(type='way', ref=232929523, role='outer'), Member(type='way', ref=232929524, role='outer'), Member(type='way', ref=232928850, role='outer'), Member(type='way', ref=160844041, role='outer'), Member(type='way', ref=160844037, role='outer'), Member(type='way', ref=160844042, role='outer'), Member(type='way', ref=28421793, role='outer'), Member(type='way', ref=76169009, role='outer'), Member(type='way', ref=160695674, role='outer'), Member(type='way', ref=160695684, role='outer'), Member(type='way', ref=160695681, role='outer'), Member(type='way', ref=160695673, role='outer'), Member(type='way', ref=160695683, role='outer'), Member(type='way', ref=28421711, role='outer'), Member(type='way', ref=28421710, role='outer'), Member(type='way', ref=160695677, role='outer'), Member(type='way', ref=160695678, role='outer'), Member(type='way', ref=160695676, role='outer'), Member(type='way', ref=160695675, role='outer'), Member(type='way', ref=28421704, role='outer'), Member(type='way', ref=28421930, role='outer'), Member(type='way', ref=28421703, role='outer'), Member(type='way', ref=28421702, role='outer'), Member(type='way', ref=28421701, role='outer'), Member(type='way', ref=53636983, role='outer'), Member(type='way', ref=53636985, role='outer'), Member(type='way', ref=28421696, role='outer'), Member(type='way', ref=28421692, role='outer'), Member(type='way', ref=28421691, role='outer'), Member(type='way', ref=28421690, role='outer'), Member(type='way', ref=28421689, role='outer'), Member(type='way', ref=28421688, role='outer'), Member(type='way', ref=28421686, role='outer'), Member(type='way', ref=28421684, role='outer'), Member(type='way', ref=28421683, role='outer'), Member(type='way', ref=28421682, role='outer'), Member(type='way', ref=28421681, role='outer'), Member(type='way', ref=28421927, role='outer'), Member(type='way', ref=160784070, role='outer'), Member(type='way', ref=28421678, role='outer'), Member(type='way', ref=28421926, role='outer'), Member(type='way', ref=28421677, role='outer'), Member(type='way', ref=28421675, role='outer'), Member(type='way', ref=28421674, role='outer'), Member(type='way', ref=28421673, role='outer'), Member(type='way', ref=160844043, role='outer')] {'admin_level': '4', 'alt_name:eo': 'Skotio;Skotujo', 'alt_name:vi': 'Tô Cách Lan;Xcốt-lan', 'boundary': 'administrative', 'is_in:continent': 'Europe', 'is_in:country': 'United Kingdom', 'ISO3166-2': 'GB-SCT', 'name': 'Scotland', 'name:ar': 'إسكتلندا', 'name:ca': 'Escòcia', 'name:cy': 'Yr Alban', 'name:de': 'Schottland', 'name:en': 'Scotland', 'name:eo': 'Skotlando', 'name:es': 'Escocia', 'name:eu': 'Eskozia', 'name:fr': 'Écosse', 'name:fy': 'Skotlân', 'name:gd': 'Alba', 'name:gv': 'Nalbin', 'name:hr': 'Škotska', 'name:hsb': 'Šotiska', 'name:hu': 'Skócia', 'name:ia': 'Scotia', 'name:io': 'Skotia', 'name:it': 'Scozia', 'name:la': 'Scotia', 'name:lt': 'Škotija', 'name:mi': 'Koterana', 'name:nds': 'Schottland', 'name:nl': 'Schotland', 'name:pl': 'Szkocja', 'name:pt': 'Escócia', 'name:ru': 'Шотландия', 'name:sco': 'Scotland', 'name:sk': 'Škótsko', 'name:sv': 'Skottland', 'name:tok': 'ma Sukosi', 'name:tzl': 'Escot', 'name:vo': 'Skotän', 'official_name:it': 'Nazione di Scozia', 'place': 'state', 'ref:gss': 'S92000003', 'ref:nuts:1': 'UKM', 'source:ref:gss': 'ONS_OpenData', 'type': 'boundary', 'wikidata': 'Q22', 'wikipedia': 'en:Scotland'})
Relation(58447 ->  [Member(type='way', ref=263720062, role='outer'), Member(type='way', ref=457735336, role='outer'), Member(type='way', ref=34562551, role='outer'), Member(type='way', ref=45204540, role='outer'), Member(type='way', ref=220076680, role='outer'), Member(type='way', ref=220076675, role='outer'), Member(type='way', ref=217379075, role='outer'), Member(type='way', ref=205966536, role='outer'), Member(type='way', ref=205966518, role='outer'), Member(type='way', ref=205966527, role='outer'), Member(type='way', ref=438310884, role='outer'), Member(type='way', ref=205896941, role='outer'), Member(type='way', ref=205896937, role='outer'), Member(type='way', ref=205896942, role='outer'), Member(type='way', ref=205896943, role='outer'), Member(type='way', ref=205896948, role='outer'), Member(type='way', ref=205896949, role='outer'), Member(type='way', ref=205896946, role='outer'), Member(type='way', ref=327842395, role='outer'), Member(type='way', ref=327842339, role='outer'), Member(type='way', ref=327652000, role='outer'), Member(type='way', ref=327575521, role='outer'), Member(type='way', ref=400030550, role='outer'), Member(type='way', ref=329182124, role='outer'), Member(type='way', ref=400030551, role='outer'), Member(type='way', ref=329182119, role='outer'), Member(type='way', ref=327567854, role='outer'), Member(type='way', ref=103331116, role='outer'), Member(type='way', ref=103244036, role='outer'), Member(type='way', ref=103243855, role='outer'), Member(type='way', ref=103243720, role='outer'), Member(type='way', ref=103242925, role='outer'), Member(type='way', ref=103242652, role='outer'), Member(type='way', ref=103242640, role='outer'), Member(type='way', ref=103242636, role='outer'), Member(type='way', ref=103363497, role='outer'), Member(type='way', ref=103238637, role='outer'), Member(type='way', ref=103234149, role='outer'), Member(type='way', ref=103233907, role='outer'), Member(type='way', ref=103242633, role='outer'), Member(type='way', ref=103083552, role='outer'), Member(type='way', ref=103083491, role='outer'), Member(type='way', ref=103229499, role='outer'), Member(type='way', ref=101122080, role='outer'), Member(type='way', ref=256424766, role='outer'), Member(type='way', ref=101120705, role='outer'), Member(type='way', ref=101120498, role='outer'), Member(type='way', ref=101122079, role='outer'), Member(type='way', ref=101120541, role='outer'), Member(type='way', ref=101120492, role='outer'), Member(type='way', ref=101120374, role='outer'), Member(type='way', ref=101120877, role='outer'), Member(type='way', ref=101068792, role='outer'), Member(type='way', ref=101068692, role='outer'), Member(type='way', ref=101068654, role='outer'), Member(type='way', ref=101068456, role='outer'), Member(type='way', ref=101068379, role='outer'), Member(type='way', ref=100907663, role='outer'), Member(type='way', ref=100907957, role='outer'), Member(type='way', ref=35072901, role='outer'), Member(type='way', ref=128392990, role='outer'), Member(type='way', ref=72894791, role='outer'), Member(type='way', ref=39498763, role='outer'), Member(type='way', ref=28421802, role='outer'), Member(type='way', ref=160784092, role='outer'), Member(type='way', ref=28421801, role='outer'), Member(type='way', ref=28421800, role='outer'), Member(type='way', ref=160784074, role='outer'), Member(type='way', ref=160844040, role='outer'), Member(type='way', ref=160844037, role='outer'), Member(type='way', ref=160844041, role='outer'), Member(type='way', ref=232928850, role='outer'), Member(type='way', ref=232929524, role='outer'), Member(type='way', ref=232929523, role='outer'), Member(type='way', ref=139312404, role='outer'), Member(type='way', ref=139489045, role='outer'), Member(type='way', ref=139315081, role='outer'), Member(type='way', ref=139312409, role='outer'), Member(type='way', ref=139487830, role='outer'), Member(type='way', ref=139314855, role='outer'), Member(type='way', ref=369520845, role='outer'), Member(type='way', ref=140550301, role='outer'), Member(type='way', ref=141730506, role='outer'), Member(type='way', ref=147078911, role='outer'), Member(type='way', ref=179086121, role='outer'), Member(type='way', ref=147077889, role='outer'), Member(type='way', ref=191586938, role='outer'), Member(type='way', ref=191586935, role='outer'), Member(type='way', ref=142982510, role='outer'), Member(type='way', ref=141730497, role='outer'), Member(type='way', ref=229560139, role='outer'), Member(type='way', ref=160844050, role='outer'), Member(type='way', ref=47838397, role='outer'), Member(type='way', ref=28421666, role='outer'), Member(type='way', ref=28421664, role='outer'), Member(type='way', ref=28421663, role='outer'), Member(type='way', ref=28421662, role='outer'), Member(type='way', ref=28421659, role='outer'), Member(type='way', ref=28421653, role='outer'), Member(type='way', ref=28421645, role='outer'), Member(type='way', ref=160784068, role='outer'), Member(type='way', ref=28421639, role='outer'), Member(type='way', ref=53268792, role='outer'), Member(type='way', ref=28421637, role='outer'), Member(type='way', ref=28421635, role='outer'), Member(type='way', ref=28421633, role='outer'), Member(type='way', ref=28421631, role='outer'), Member(type='way', ref=28421630, role='outer'), Member(type='way', ref=28421629, role='outer'), Member(type='way', ref=28421627, role='outer'), Member(type='way', ref=132078258, role='outer'), Member(type='way', ref=132078261, role='outer'), Member(type='way', ref=28421878, role='outer'), Member(type='way', ref=28421876, role='outer'), Member(type='way', ref=160784084, role='outer'), Member(type='way', ref=28421874, role='outer'), Member(type='way', ref=160784081, role='outer'), Member(type='way', ref=28421873, role='outer'), Member(type='way', ref=28421872, role='outer'), Member(type='way', ref=28421871, role='outer'), Member(type='way', ref=28421868, role='outer'), Member(type='way', ref=28421864, role='outer'), Member(type='way', ref=28421861, role='outer'), Member(type='way', ref=28421857, role='outer'), Member(type='way', ref=28421855, role='outer'), Member(type='way', ref=28421853, role='outer'), Member(type='way', ref=28421852, role='outer'), Member(type='way', ref=160696657, role='outer'), Member(type='way', ref=160696656, role='outer'), Member(type='way', ref=160696653, role='outer'), Member(type='way', ref=39498765, role='outer'), Member(type='way', ref=257549576, role='outer'), Member(type='way', ref=46095782, role='outer'), Member(type='way', ref=195449843, role='outer'), Member(type='way', ref=257549577, role='outer'), Member(type='way', ref=257563732, role='outer'), Member(type='way', ref=205959116, role='outer'), Member(type='way', ref=396609752, role='outer'), Member(type='way', ref=396618161, role='outer'), Member(type='way', ref=205976472, role='outer'), Member(type='way', ref=396773075, role='outer'), Member(type='way', ref=27336799, role='outer'), Member(type='way', ref=165549422, role='outer'), Member(type='way', ref=396786057, role='outer'), Member(type='way', ref=165549409, role='outer'), Member(type='way', ref=414381571, role='outer'), Member(type='way', ref=414381572, role='outer'), Member(type='way', ref=28216462, role='outer'), Member(type='way', ref=28216457, role='outer'), Member(type='way', ref=414423561, role='outer'), Member(type='way', ref=28216456, role='outer'), Member(type='way', ref=205729136, role='outer'), Member(type='way', ref=205729123, role='outer'), Member(type='way', ref=205729142, role='outer'), Member(type='way', ref=205729132, role='outer'), Member(type='way', ref=205729141, role='outer'), Member(type='way', ref=205729130, role='outer'), Member(type='way', ref=205729143, role='outer'), Member(type='way', ref=414458714, role='outer'), Member(type='way', ref=414458723, role='outer'), Member(type='way', ref=414458722, role='outer'), Member(type='way', ref=414458716, role='outer'), Member(type='way', ref=414458725, role='outer'), Member(type='way', ref=414462841, role='outer'), Member(type='way', ref=414462838, role='outer'), Member(type='way', ref=414462837, role='outer'), Member(type='way', ref=414485393, role='outer'), Member(type='way', ref=414485382, role='outer'), Member(type='way', ref=414485417, role='outer'), Member(type='way', ref=414485381, role='outer'), Member(type='way', ref=414485386, role='outer'), Member(type='way', ref=414485384, role='outer'), Member(type='way', ref=414485387, role='outer'), Member(type='way', ref=414485414, role='outer'), Member(type='way', ref=414485383, role='outer'), Member(type='way', ref=414489023, role='outer'), Member(type='way', ref=414491535, role='outer'), Member(type='way', ref=414491530, role='outer'), Member(type='way', ref=414494993, role='outer'), Member(type='way', ref=414494990, role='outer'), Member(type='way', ref=414497994, role='outer'), Member(type='way', ref=414497996, role='outer'), Member(type='way', ref=414497995, role='outer'), Member(type='way', ref=416432073, role='outer'), Member(type='way', ref=416432049, role='outer'), Member(type='way', ref=416432048, role='outer'), Member(type='way', ref=414503012, role='outer'), Member(type='way', ref=414508038, role='outer'), Member(type='way', ref=416432071, role='outer'), Member(type='way', ref=416432045, role='outer'), Member(type='way', ref=416432072, role='outer'), Member(type='way', ref=416432047, role='outer'), Member(type='way', ref=416177085, role='outer'), Member(type='way', ref=416432046, role='outer'), Member(type='way', ref=264184620, role='outer'), Member(type='way', ref=414512811, role='outer'), Member(type='way', ref=264184591, role='outer'), Member(type='way', ref=414860929, role='outer'), Member(type='way', ref=409323332, role='outer'), Member(type='way', ref=414862816, role='outer'), Member(type='way', ref=414865917, role='outer'), Member(type='way', ref=414866903, role='outer'), Member(type='way', ref=413739740, role='outer'), Member(type='way', ref=414868320, role='outer'), Member(type='way', ref=413747636, role='outer'), Member(type='way', ref=413746493, role='outer'), Member(type='way', ref=413733133, role='outer'), Member(type='way', ref=409323320, role='outer'), Member(type='way', ref=413733135, role='outer'), Member(type='way', ref=413739752, role='outer'), Member(type='way', ref=414877555, role='outer'), Member(type='way', ref=10530953, role='outer'), Member(type='way', ref=461035991, role='outer'), Member(type='way', ref=51451167, role='outer'), Member(type='way', ref=461034640, role='outer'), Member(type='way', ref=461034639, role='outer'), Member(type='way', ref=461133606, role='outer'), Member(type='way', ref=461129264, role='outer'), Member(type='way', ref=461134949, role='outer'), Member(type='way', ref=461140350, role='outer'), Member(type='way', ref=461142050, role='outer'), Member(type='way', ref=461140353, role='outer'), Member(type='way', ref=461624739, role='outer'), Member(type='way', ref=43341266, role='outer'), Member(type='way', ref=461624736, role='outer'), Member(type='way', ref=463133630, role='outer'), Member(type='way', ref=357570509, role='outer'), Member(type='way', ref=357570505, role='outer'), Member(type='way', ref=327867524, role='outer'), Member(type='way', ref=327867517, role='outer'), Member(type='way', ref=338686049, role='outer'), Member(type='way', ref=338686051, role='outer'), Member(type='way', ref=338685494, role='outer'), Member(type='way', ref=112260448, role='outer'), Member(type='way', ref=264086030, role='outer'), Member(type='way', ref=264086033, role='outer'), Member(type='way', ref=438269870, role='outer'), Member(type='way', ref=264086032, role='outer'), Member(type='way', ref=264083827, role='outer'), Member(type='way', ref=264083825, role='outer'), Member(type='way', ref=264083823, role='outer'), Member(type='way', ref=438275693, role='outer'), Member(type='way', ref=264083822, role='outer'), Member(type='way', ref=264083834, role='outer'), Member(type='way', ref=264083851, role='outer'), Member(type='way', ref=264083843, role='outer'), Member(type='way', ref=264083847, role='outer'), Member(type='way', ref=264083829, role='outer'), Member(type='way', ref=264083828, role='outer'), Member(type='way', ref=264083820, role='outer'), Member(type='way', ref=438292186, role='outer'), Member(type='node', ref=107775, role='admin_centre'), Member(type='way', ref=477245644, role='outer'), Member(type='way', ref=477245642, role='outer'), Member(type='way', ref=477245640, role='outer'), Member(type='way', ref=477245643, role='outer'), Member(type='way', ref=477245641, role='outer'), Member(type='way', ref=477245639, role='outer'), Member(type='way', ref=477338663, role='outer'), Member(type='way', ref=477245638, role='outer'), Member(type='way', ref=477245637, role='outer'), Member(type='way', ref=477245636, role='outer'), Member(type='way', ref=477571098, role='outer'), Member(type='way', ref=477245634, role='outer'), Member(type='way', ref=477245632, role='outer'), Member(type='way', ref=477245630, role='outer'), Member(type='way', ref=477245631, role='outer'), Member(type='way', ref=477245629, role='outer'), Member(type='way', ref=477567261, role='outer'), Member(type='way', ref=477567258, role='outer'), Member(type='way', ref=477567270, role='outer'), Member(type='way', ref=477567273, role='outer'), Member(type='way', ref=477567259, role='outer'), Member(type='way', ref=477567271, role='outer'), Member(type='way', ref=477567257, role='outer')] {'admin_level': '4', 'alt_name:eo': 'Anglio', 'alt_name:ia': 'Anglia', 'alt_name:nds': 'Ingland', 'boundary': 'administrative', 'ISO3166-2': 'GB-ENG', 'name': 'England', 'name:be': 'Англія', 'name:ca': 'Anglaterra', 'name:cs': 'Anglie', 'name:cy': 'Lloegr', 'name:de': 'England', 'name:el': 'Αγγλία', 'name:en': 'England', 'name:eo': 'Anglujo', 'name:es': 'Inglaterra', 'name:fr': 'Angleterre', 'name:fy': 'Ingelân', 'name:gd': 'Sasainn', 'name:gv': 'Sostyn', 'name:hsb': 'Jendźelska', 'name:hu': 'Anglia', 'name:ia': 'Anglaterra', 'name:io': 'Anglia', 'name:it': 'Inghilterra', 'name:la': 'Anglia', 'name:lt': 'Anglija', 'name:nds': 'England', 'name:nl': 'Engeland', 'name:pl': 'Anglia', 'name:pt': 'Inglaterra', 'name:ru': 'Англия', 'name:sk': 'Anglicko', 'name:sv': 'England', 'name:tok': 'ma Inli', 'name:tr': 'İngiltere', 'name:tzl': 'Anglatzara', 'name:uk': 'Англія', 'name:vi': 'Anh', 'name:vo': 'Linglän', 'name:zh': '英格蘭', 'name:zh-classical': '英格蘭', 'name:zh-simplified': '英格兰', 'name:zh-traditional': '英格蘭', 'old_name:vi': 'Anh Quốc', 'ref:gss': 'E92000001', 'source:ref:gss': 'ONS_OpenData', 'type': 'boundary', 'wikidata': 'Q21', 'wikipedia': 'en:England'})

The library also supports reading full way and relation information, basically doing the necessary "joins" for you... For relations, as noted above, some of the members may not be documented in the XML file extract.


In [11]:
way = next(db.ways())
print("Normal way:")
print(way)
print("And with full node details:")
print(db.complete_way(way))


Normal way:
Way(22772385 ->  [292039814, 4375877404, 4375877408, 292039921, 292039778, 13802141, 4448539080, 13802146, 472475209, 472475210, 472475212, 472475213, 472475211, 472475214, 13802147, 453404917, 453404920, 453404923, 453404925, 13802148, 621886775] {'duration': '2:45', 'name': 'Isle of Man - Liverpool', 'operator': 'Isle of Man Steam Packet Company', 'route': 'ferry'})
And with full node details:
RichWay(22772385 ->  [Node(292039814 @ [54.1478747,-4.4715328] {}), Node(4375877404 @ [54.1470434,-4.4679204] {}), Node(4375877408 @ [54.1487706,-4.4639167] {}), Node(292039921 @ [54.1503034,-4.4585213] {}), Node(292039778 @ [54.1484944,-4.4488409] {}), Node(13802141 @ [53.5104366,-3.1942821] {}), Node(4448539080 @ [53.4744186,-3.0679278] {}), Node(13802146 @ [53.448266,-3.0242517] {}), Node(472475209 @ [53.4448364,-3.0200648] {}), Node(472475210 @ [53.4427327,-3.0177438] {}), Node(472475212 @ [53.4409289,-3.0166448] {}), Node(472475213 @ [53.4393516,-3.0158698] {}), Node(472475211 @ [53.4375632,-3.01512] {}), Node(472475214 @ [53.42904,-3.0122531] {}), Node(13802147 @ [53.4165984,-3.0086659] {}), Node(453404917 @ [53.406048,-3.0035435] {}), Node(453404920 @ [53.4050316,-3.0016958] {}), Node(453404923 @ [53.4050268,-3.0005803] {}), Node(453404925 @ [53.4052348,-3.0000057] {}), Node(13802148 @ [53.4056923,-2.9994948] {}), Node(621886775 @ [53.4061933,-2.9996725] {})] {'duration': '2:45', 'name': 'Isle of Man - Liverpool', 'operator': 'Isle of Man Steam Packet Company', 'route': 'ferry'})

In [12]:
all_relations = db.relations()
for _ in range(4):
    relation = next(all_relations)
relation = db.complete_relation(relation)
relation.members[0], relation.complete_members[0]


Out[12]:
(Member(type='relation', ref=1061144, role='subarea'),
 RichRelation(1061144 ->  [Member(type='way', ref=66301738, role='outer'), Member(type='way', ref=67125050, role='outer'), Member(type='way', ref=67125031, role='outer'), Member(type='way', ref=67125059, role='outer'), Member(type='way', ref=66301714, role='outer'), Member(type='way', ref=67125043, role='outer'), Member(type='way', ref=67125015, role='outer'), Member(type='way', ref=157421918, role='outer'), Member(type='way', ref=25985917, role='outer'), Member(type='way', ref=67125066, role='outer'), Member(type='way', ref=67125058, role='outer'), Member(type='relation', ref=1061141, role='subarea'), Member(type='relation', ref=1061134, role='subarea'), Member(type='relation', ref=1061131, role='subarea')] {'name': 'Ayre', 'type': 'boundary', 'source': 'Isle of Man Government 1:25000 map (2007)', 'boundary': 'administrative', 'wikidata': 'Q2484719', 'admin_level': '6'}))

Experiment with file sizes

The following is not so interesting from a usage perspective...

One problem with this approach is that the resulting .db files are rather large. Given that this format should be considered an intermediate format, to aid data exploration, but not for any form of distribution, my view is that this doesn't matter. However, for academic interest, here we explore the file size, and show that we could store the data in a somewhat more efficient schema, if we really wanted to.


In [13]:
os.stat("demo.db").st_size / 1024 / 1024


Out[13]:
15.63671875

Let's look at the average number of tags etc.


In [14]:
import collections
collections.Counter(len(node.tags) for node in db.nodes())


Out[14]:
Counter({0: 186864,
         1: 3609,
         2: 800,
         3: 1355,
         4: 299,
         5: 146,
         6: 66,
         7: 48,
         8: 49,
         9: 17,
         10: 13,
         11: 5,
         12: 4,
         13: 2,
         14: 2,
         15: 1,
         39: 1})

In [15]:
# Sanity check: this agrees, and is (a bit) slower
import osmdigest.digest as digest

gen = digest.parse(filename)
next(gen), next(gen)
collections.Counter(len(element.tags) for element in gen if element.name == "node")


Out[15]:
Counter({0: 186864,
         1: 3609,
         2: 800,
         3: 1355,
         4: 299,
         5: 146,
         6: 66,
         7: 48,
         8: 49,
         9: 17,
         10: 13,
         11: 5,
         12: 4,
         13: 2,
         14: 2,
         15: 1,
         39: 1})

In [16]:
collections.Counter(len(node.tags) for node in db.ways())


Out[16]:
Counter({0: 63,
         1: 9161,
         2: 6264,
         3: 2103,
         4: 1782,
         5: 1723,
         6: 1188,
         7: 863,
         8: 335,
         9: 197,
         10: 142,
         11: 56,
         12: 33,
         13: 17,
         14: 10,
         15: 8,
         16: 1,
         20: 1})

In [17]:
collections.Counter(len(node.tags) for node in db.relations())


Out[17]:
Counter({0: 5,
         1: 1,
         2: 71,
         3: 21,
         4: 24,
         5: 37,
         6: 30,
         7: 8,
         8: 9,
         9: 1,
         11: 2,
         12: 2,
         13: 1,
         14: 1,
         15: 1,
         19: 1,
         21: 1,
         43: 1,
         48: 1,
         50: 1,
         137: 1,
         152: 1,
         265: 1})

In [18]:
collections.Counter(len(way.nodes) for way in db.ways())


Out[18]:
Counter({2: 2637,
         3: 1421,
         4: 985,
         5: 6338,
         6: 1555,
         7: 2484,
         8: 1171,
         9: 1685,
         10: 522,
         11: 850,
         12: 358,
         13: 603,
         14: 274,
         15: 366,
         16: 207,
         17: 247,
         18: 182,
         19: 172,
         20: 141,
         21: 140,
         22: 94,
         23: 88,
         24: 87,
         25: 76,
         26: 49,
         27: 70,
         28: 64,
         29: 42,
         30: 54,
         31: 44,
         32: 42,
         33: 37,
         34: 38,
         35: 41,
         36: 30,
         37: 35,
         38: 16,
         39: 34,
         40: 27,
         41: 25,
         42: 26,
         43: 23,
         44: 14,
         45: 16,
         46: 19,
         47: 18,
         48: 18,
         49: 12,
         50: 16,
         51: 22,
         52: 15,
         53: 13,
         54: 9,
         55: 13,
         56: 9,
         57: 10,
         58: 10,
         59: 9,
         60: 9,
         61: 11,
         62: 15,
         63: 6,
         64: 7,
         65: 5,
         66: 7,
         67: 8,
         68: 8,
         69: 4,
         70: 8,
         71: 9,
         72: 10,
         73: 6,
         74: 4,
         75: 5,
         76: 1,
         77: 5,
         78: 1,
         79: 3,
         80: 8,
         81: 3,
         82: 7,
         83: 2,
         84: 5,
         85: 6,
         86: 4,
         87: 2,
         88: 4,
         89: 5,
         90: 2,
         91: 3,
         92: 2,
         93: 3,
         94: 2,
         95: 2,
         96: 4,
         97: 4,
         99: 1,
         100: 2,
         101: 1,
         102: 4,
         103: 5,
         105: 2,
         106: 1,
         107: 3,
         108: 3,
         109: 1,
         111: 7,
         113: 4,
         114: 3,
         115: 1,
         116: 1,
         117: 2,
         118: 2,
         119: 2,
         120: 4,
         121: 1,
         122: 3,
         123: 1,
         124: 2,
         125: 1,
         126: 3,
         127: 1,
         128: 1,
         130: 2,
         131: 1,
         132: 2,
         133: 5,
         134: 1,
         136: 1,
         137: 1,
         139: 2,
         140: 2,
         141: 1,
         143: 1,
         144: 1,
         147: 3,
         148: 1,
         149: 1,
         151: 2,
         152: 1,
         153: 1,
         154: 2,
         156: 1,
         157: 1,
         158: 1,
         160: 2,
         161: 2,
         162: 2,
         163: 1,
         167: 1,
         168: 2,
         169: 1,
         181: 1,
         184: 1,
         185: 1,
         189: 1,
         190: 1,
         192: 2,
         195: 1,
         196: 1,
         199: 1,
         206: 2,
         213: 1,
         218: 1,
         220: 1,
         223: 1,
         230: 1,
         231: 1,
         237: 1,
         244: 1,
         251: 1,
         275: 1,
         283: 1,
         288: 1,
         307: 1,
         317: 1,
         319: 1,
         333: 1,
         337: 2,
         358: 1,
         391: 1,
         404: 1,
         437: 1,
         773: 1,
         1049: 1,
         1841: 1})

Size of each DB component

Here we create a single DB for each component (nodes, tags, ways, ...) Some thoughts:

  • There is not much we can about nodes in terms of improving the schema.
  • Tags on nodes, and everything to do with relations, form a small part of the total.
  • So focus on ways and their tags.

In [19]:
def make_new_db(name):
    try:
        os.remove(name)
    except Exception as e:
        print("Failed to delete {} because {}".format(name, e))
    return sqlite3.connect(name)

def close_connection(connection, name):
    connection.commit()
    connection.close()
    connection = sqlite3.connect(name, isolation_level=None)
    connection.execute("vacuum")
    connection.close()
    return os.stat(name).st_size / 1024 / 1024

In [20]:
connection = make_new_db("nodes.db")
connection.execute("create table nodes(osm_id integer primary key, longitude integer, latitude integer)")
for res in db.connection.execute("select * from nodes"):
    connection.execute("insert into nodes values(?,?,?)", tuple(res))
close_connection(connection, "nodes.db")


Out[20]:
3.72265625

In [21]:
connection = make_new_db("node_tags.db")
connection.execute("create table node_tags(osm_id integer, key text, value text)")
connection.execute("create index node_tags_osm_id_idx on node_tags(osm_id)")
for res in db.connection.execute("select * from node_tags"):
    connection.execute("insert into node_tags values(?,?,?)", tuple(res))
close_connection(connection, "node_tags.db")


Out[21]:
0.58203125

In [22]:
connection = make_new_db("ways.db")
connection.execute("create table ways(osm_id integer, position integer, noderef integer)")
connection.execute("create index ways_idx on ways(osm_id, position)")
for res in db.connection.execute("select * from ways"):
    connection.execute("insert into ways values(?,?,?)", tuple(res))
close_connection(connection, "ways.db")


Out[22]:
7.66796875

In [23]:
connection = make_new_db("way_tags.db")
connection.execute("create table way_tags(osm_id integer, key text, value text)")
connection.execute("create index way_tags_osm_id_idx on way_tags(osm_id)")
for res in db.connection.execute("select * from way_tags"):
    connection.execute("insert into way_tags values(?,?,?)", tuple(res))
close_connection(connection, "way_tags.db")


Out[23]:
2.7890625

In [24]:
connection = make_new_db("relations.db")
connection.execute("create table relations(osm_id integer, member text, memberref integer, role text)")
connection.execute("create index relations_idx on relations(osm_id)")
for res in db.connection.execute("select * from relations"):
    connection.execute("insert into relations values(?,?,?,?)", tuple(res))
close_connection(connection, "relations.db")


Out[24]:
0.2109375

In [25]:
connection = make_new_db("relation_tags.db")
connection.execute("create table relation_tags(osm_id integer, key text, value text)")
connection.execute("create index relation_tags_osm_id_idx on relation_tags(osm_id)")
for res in db.connection.execute("select * from relation_tags"):
    connection.execute("insert into relation_tags values(?,?,?)", tuple(res))
close_connection(connection, "relation_tags.db")


Out[25]:
0.08203125

Change the schema for ways: list more than one node per row. As you can see, we can reduce the disk usage quite a bit. But probably not enough to justify making the code more complicated.


In [26]:
def chunk(nodes, length):
    i = 0
    out = []
    part = []
    while i < len(nodes):
        part.append(nodes[i])
        i += 1
        if i == len(nodes):
            while len(part) < length:
                part.append(None)
            out.append(part)
            break
        if len(part) == length:
            out.append(part)
            part = []
    return out
    
assert(chunk([1,2,3,4,5,6], 2) == [[1,2],[3,4],[5,6]])
assert(chunk([1,2,3,4,5], 2) == [[1,2],[3,4],[5,None]])

Storing around 10 to 15 node references in each row would be more efficient...


In [27]:
for length in range(1,20):
    connection = make_new_db("ways_trial.db")
    create_string = ("create table ways(osm_id integer, position integer,"
        + ", ".join(["noderef"+str(i)+" integer" for i in range(length)])
        + ")" )
    insert_string = ("insert into ways values (?,?,"
        + ",".join(["?"]*length) + ")" )
    connection.execute(create_string)
    connection.execute("create index ways_idx on ways(osm_id, position)")
    for way in db.ways():
        for pos, part in enumerate(chunk(way.nodes, length)):
            data = (way.osm_id, pos, *part)
            connection.execute(insert_string, data)
    size = close_connection(connection, "ways_trial.db")
    print("{} -> {}".format(length, size))


1 -> 7.66796875
2 -> 4.61328125
3 -> 3.5390625
4 -> 3.1171875
5 -> 2.70703125
6 -> 2.5546875
7 -> 2.3984375
8 -> 2.32421875
9 -> 2.25390625
10 -> 2.2265625
11 -> 2.1953125
12 -> 2.1875
13 -> 2.17578125
14 -> 2.17578125
15 -> 2.17578125
16 -> 2.1875
17 -> 2.19140625
18 -> 2.203125
19 -> 2.2109375